# Importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from warnings import filterwarnings
filterwarnings("ignore")
data=pd.read_csv(r"C:\Users\laxma\Downloads\Mobile_Price_Classification.csv")
data.shape
(2000, 21)
data.head()
| battery_power | blue | clock_speed | dual_sim | fc | four_g | int_memory | m_dep | mobile_wt | n_cores | ... | px_height | px_width | ram | sc_h | sc_w | talk_time | three_g | touch_screen | wifi | price_range | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 842 | 0 | 2.2 | 0 | 1 | 0 | 7 | 0.6 | 188 | 2 | ... | 20 | 756 | 2549 | 9 | 7 | 19 | 0 | 0 | 1 | 1 |
| 1 | 1021 | 1 | 0.5 | 1 | 0 | 1 | 53 | 0.7 | 136 | 3 | ... | 905 | 1988 | 2631 | 17 | 3 | 7 | 1 | 1 | 0 | 2 |
| 2 | 563 | 1 | 0.5 | 1 | 2 | 1 | 41 | 0.9 | 145 | 5 | ... | 1263 | 1716 | 2603 | 11 | 2 | 9 | 1 | 1 | 0 | 2 |
| 3 | 615 | 1 | 2.5 | 0 | 0 | 0 | 10 | 0.8 | 131 | 6 | ... | 1216 | 1786 | 2769 | 16 | 8 | 11 | 1 | 0 | 0 | 2 |
| 4 | 1821 | 1 | 1.2 | 0 | 13 | 1 | 44 | 0.6 | 141 | 2 | ... | 1208 | 1212 | 1411 | 8 | 2 | 15 | 1 | 1 | 0 | 1 |
5 rows × 21 columns
data.tail()
| battery_power | blue | clock_speed | dual_sim | fc | four_g | int_memory | m_dep | mobile_wt | n_cores | ... | px_height | px_width | ram | sc_h | sc_w | talk_time | three_g | touch_screen | wifi | price_range | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1995 | 794 | 1 | 0.5 | 1 | 0 | 1 | 2 | 0.8 | 106 | 6 | ... | 1222 | 1890 | 668 | 13 | 4 | 19 | 1 | 1 | 0 | 0 |
| 1996 | 1965 | 1 | 2.6 | 1 | 0 | 0 | 39 | 0.2 | 187 | 4 | ... | 915 | 1965 | 2032 | 11 | 10 | 16 | 1 | 1 | 1 | 2 |
| 1997 | 1911 | 0 | 0.9 | 1 | 1 | 1 | 36 | 0.7 | 108 | 8 | ... | 868 | 1632 | 3057 | 9 | 1 | 5 | 1 | 1 | 0 | 3 |
| 1998 | 1512 | 0 | 0.9 | 0 | 4 | 1 | 46 | 0.1 | 145 | 5 | ... | 336 | 670 | 869 | 18 | 10 | 19 | 1 | 1 | 1 | 0 |
| 1999 | 510 | 1 | 2.0 | 1 | 5 | 1 | 45 | 0.9 | 168 | 6 | ... | 483 | 754 | 3919 | 19 | 4 | 2 | 1 | 1 | 1 | 3 |
5 rows × 21 columns
data.isnull().sum()
battery_power 0 blue 0 clock_speed 0 dual_sim 0 fc 0 four_g 0 int_memory 0 m_dep 0 mobile_wt 0 n_cores 0 pc 0 px_height 0 px_width 0 ram 0 sc_h 0 sc_w 0 talk_time 0 three_g 0 touch_screen 0 wifi 0 price_range 0 dtype: int64
data.describe()
| battery_power | blue | clock_speed | dual_sim | fc | four_g | int_memory | m_dep | mobile_wt | n_cores | ... | px_height | px_width | ram | sc_h | sc_w | talk_time | three_g | touch_screen | wifi | price_range | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2000.000000 | 2000.0000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | ... | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 |
| mean | 1238.518500 | 0.4950 | 1.522250 | 0.509500 | 4.309500 | 0.521500 | 32.046500 | 0.501750 | 140.249000 | 4.520500 | ... | 645.108000 | 1251.515500 | 2124.213000 | 12.306500 | 5.767000 | 11.011000 | 0.761500 | 0.503000 | 0.507000 | 1.500000 |
| std | 439.418206 | 0.5001 | 0.816004 | 0.500035 | 4.341444 | 0.499662 | 18.145715 | 0.288416 | 35.399655 | 2.287837 | ... | 443.780811 | 432.199447 | 1084.732044 | 4.213245 | 4.356398 | 5.463955 | 0.426273 | 0.500116 | 0.500076 | 1.118314 |
| min | 501.000000 | 0.0000 | 0.500000 | 0.000000 | 0.000000 | 0.000000 | 2.000000 | 0.100000 | 80.000000 | 1.000000 | ... | 0.000000 | 500.000000 | 256.000000 | 5.000000 | 0.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 851.750000 | 0.0000 | 0.700000 | 0.000000 | 1.000000 | 0.000000 | 16.000000 | 0.200000 | 109.000000 | 3.000000 | ... | 282.750000 | 874.750000 | 1207.500000 | 9.000000 | 2.000000 | 6.000000 | 1.000000 | 0.000000 | 0.000000 | 0.750000 |
| 50% | 1226.000000 | 0.0000 | 1.500000 | 1.000000 | 3.000000 | 1.000000 | 32.000000 | 0.500000 | 141.000000 | 4.000000 | ... | 564.000000 | 1247.000000 | 2146.500000 | 12.000000 | 5.000000 | 11.000000 | 1.000000 | 1.000000 | 1.000000 | 1.500000 |
| 75% | 1615.250000 | 1.0000 | 2.200000 | 1.000000 | 7.000000 | 1.000000 | 48.000000 | 0.800000 | 170.000000 | 7.000000 | ... | 947.250000 | 1633.000000 | 3064.500000 | 16.000000 | 9.000000 | 16.000000 | 1.000000 | 1.000000 | 1.000000 | 2.250000 |
| max | 1998.000000 | 1.0000 | 3.000000 | 1.000000 | 19.000000 | 1.000000 | 64.000000 | 1.000000 | 200.000000 | 8.000000 | ... | 1960.000000 | 1998.000000 | 3998.000000 | 19.000000 | 18.000000 | 20.000000 | 1.000000 | 1.000000 | 1.000000 | 3.000000 |
8 rows × 21 columns
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2000 entries, 0 to 1999 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 battery_power 2000 non-null int64 1 blue 2000 non-null int64 2 clock_speed 2000 non-null float64 3 dual_sim 2000 non-null int64 4 fc 2000 non-null int64 5 four_g 2000 non-null int64 6 int_memory 2000 non-null int64 7 m_dep 2000 non-null float64 8 mobile_wt 2000 non-null int64 9 n_cores 2000 non-null int64 10 pc 2000 non-null int64 11 px_height 2000 non-null int64 12 px_width 2000 non-null int64 13 ram 2000 non-null int64 14 sc_h 2000 non-null int64 15 sc_w 2000 non-null int64 16 talk_time 2000 non-null int64 17 three_g 2000 non-null int64 18 touch_screen 2000 non-null int64 19 wifi 2000 non-null int64 20 price_range 2000 non-null int64 dtypes: float64(2), int64(19) memory usage: 328.2 KB
data.duplicated().sum()
0
data.columns
Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
'touch_screen', 'wifi', 'price_range'],
dtype='object')
#VISUALIZATION
plt.bar(data['blue'],data['fc'])
plt.xticks(rotation=90)
plt.show()
fig=px.bar(data,x='int_memory',y='ram',color='int_memory')
fig.show()
plt.scatter(data['mobile_wt'],data['pc'],color='yellowgreen')
plt.xticks(rotation=90)
plt.show()
plt.figure(figsize=(10,4))
sns.countplot(x='clock_speed', data=data, color='b')
plt.show()
plt.figure(figsize=(10,4))
top_car = data['talk_time'].value_counts().nlargest(10)
sns.countplot(y=data.talk_time,color='red')
<AxesSubplot:xlabel='count', ylabel='talk_time'>
sns.lineplot(x='clock_speed', y='wifi', data=data)
<AxesSubplot:xlabel='clock_speed', ylabel='wifi'>
sns.barplot(data['sc_h'],data['sc_w'],color='k')
plt.xticks(rotation=90)
plt.show()
plt.figure(figsize=(8, 4))
sns.scatterplot(data=data, x='px_height', y='px_width')
plt.xlabel('px_height')
plt.ylabel('px_width')
plt.show()
sns.displot(data["n_cores"])
<seaborn.axisgrid.FacetGrid at 0x23b9b1b26d0>
sns.countplot(x='four_g',data=data)
<AxesSubplot:xlabel='four_g', ylabel='count'>
sns.boxplot(x='four_g',y='fc',data=data)
<AxesSubplot:xlabel='four_g', ylabel='fc'>
sns.violinplot(x='blue',y='battery_power',data=data)
<AxesSubplot:xlabel='blue', ylabel='battery_power'>
#MODEL BUILDING
y = data["price_range"].values
x_data=data.drop(["price_range"],axis=1)
x = (x_data-np.min(x_data))/(np.max(x_data)-np.min(x_data))
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2,random_state=1)
from sklearn.svm import SVC
svm=SVC(random_state=1)
svm.fit(x_train,y_train)
print("train accuracy:",svm.score(x_train,y_train))
print("test accuracy:",svm.score(x_test,y_test))
train accuracy: 0.97125 test accuracy: 0.8375